---
title: "Figures 2 and 3"
output: html_notebook
---

```{r setup}
# Initial Version: Created on August 10, 2018.
# Last Checked: Verified for functionality on February 19, 2025.
# The current version is compatible with R version 4.3.0 and R Studio version 2024.12.0+467.

library(tidyverse)
library(haven)
library(lemon)

# please check working directory
getwd()
#you can also mannually set the working directory
#workdir <- "....../ReplicationPackage/Rfiles"
#setwd("workdir")

# center the tile in ggplot
theme_update(plot.title = element_text(hjust = 0.5))


```

# Figures 2 and 3 in the main text

Run "8_analysis_descriptive.do" before drawing the following figures 

## Figure 2. Fertility by mother's birth year

```{r}
fertility_year_census <- 
  read_dta("../Rdta/fertility_year_census.dta") %>%
  select(-nchild5, -nchild6) %>%
  gather(key = n, value = prop, nchild2:nchild3) %>%
  mutate(urban = factor(urban, levels = c(0,1), labels = c("A. Rural", "B. Urban")))

fertility_year_census %>%
  ggplot(aes(x = birthy, y = prop, linetype = n)) +
  facet_rep_wrap(~urban, ncol=1, repeat.tick.labels = T) +
  geom_line() +
  labs(x = "Mother's birth year",
       y = "Proportion") +
  scale_x_continuous(breaks = seq(1940, 1960, 5) ) +
  scale_y_continuous(breaks = seq(0, 1, 0.2) ) +
  scale_linetype_discrete(name = "Fertility",
                          labels = c("Two or more",
                                     "Three or more")) +
  theme_classic() +
  theme(strip.background = element_blank(),
        strip.text = element_text(size = 12),
        legend.position = "bottom") 

ggsave("Figure2_figFertilityYear.pdf", path = "../output", 
       width = 8, height = 8)
```

## Figure 3A. Education by birth year

```{r}
edu_year_census_rural <- 
  read_dta("../Rdta/edu_year_census_rural.dta") %>%
  mutate(birthy = birthy %>% as.integer) %>%
  gather(key = edulev,value = prop, edulev2:edulev4)

edu_year_census_rural %>%
  ggplot(aes(x = birthy, y = prop, linetype = edulev)) +
  geom_line() +
  labs(x = "Children's birth year",
       y = "Proportion",
       title = "A. Completed education by birth year") +
  scale_x_continuous(breaks = seq(1965, 1980, 5),
                     limits = c(1965, 1980)) +
  scale_y_continuous(breaks = seq(0,1,0.1) ) +  
  scale_linetype_manual(name = "Education",
                        labels = c("Primary school",
                                     "Middle school",
                                     "High school"),
                        values = c("solid", "dashed", "dotdash")) +
  theme_classic() + 
  theme(legend.position="none", plot.title = element_text(hjust = 0.5, size = 16)) +
  annotate("text", x = 1968, y = 0.9, label = "Primary school") +
  annotate("text", x = 1973, y = 0.58, label = "Middle school") +
  annotate("text", x = 1978, y = 0.2, label = "High school")

ggsave("Figure3_A_figEduYear.pdf", path = "../output",
       width = 8, height = 5)
```


## Figure 3B. Education by age

```{r}
edu_age_census_rural <- 
  read_dta("../Rdta/edu_age_census_rural.dta") %>%
  mutate(age = age %>% as.integer) %>%
  gather(key = edulev,value = prop, edulev2:edulev4)

edu_age_census_rural %>%
  ggplot(aes(x = age, y = prop, linetype = edulev %>% as.factor)) +
  geom_line() +
  labs(x = "Child age", y = "Proportion", title = "B. Education progress by age") + 
  scale_linetype_manual(name = "Education",
                        labels = c("Primary school",
                                     "Middle school", 
                                     "High school"),
                         values = c("solid", "dashed", "dotdash")) + 
  scale_x_continuous(breaks = 6:17) +
  scale_y_continuous(breaks = seq(0,1,0.1)) +
  theme_classic() + 
  theme(legend.position="none", plot.title = element_text(hjust = 0.5, size = 16)) +
  annotate("text", x = 6.8, y = 0.9, label = "Primary school") +
  annotate("text", x = 13, y = 0.52, label = "Middle school") +
  annotate("text", x = 16, y = 0.15, label = "High school")

ggsave("Figure3_B_figEduAge.pdf", path = "../output",
       width = 8, height = 5)
```

